{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "f86bc499",
   "metadata": {},
   "source": [
    "## download ViCILP weights and put its pth file in viclip folder. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e7a90379-d9ee-45d9-9073-7ed5132fa6b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import os\n",
    "import cv2\n",
    "\n",
    "try:\n",
    "    from viclip import get_viclip, retrieve_text, _frame_from_video\n",
    "except:\n",
    "    from .viclip import get_viclip, retrieve_text, _frame_from_video"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a425a5da-ceaf-4b89-9845-c8ba576902d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "video = cv2.VideoCapture('example1.mp4')\n",
    "frames = [x for x in _frame_from_video(video)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e6c1cd7a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# modify xxx to the path of the pretrained model\n",
    "model_cfgs = {\n",
    "    'viclip-l-internvid-10m-flt': {\n",
    "        'size': 'l',\n",
    "        'pretrained': 'xxx/ViCLIP-L_InternVid-FLT-10M.pth',\n",
    "    },\n",
    "    'viclip-l-internvid-200m': {\n",
    "        'size': 'l',\n",
    "        'pretrained': 'xxx/ViCLIP-L_InternVid-200M.pth',\n",
    "    },\n",
    "    'viclip-b-internvid-10m-flt': {\n",
    "        'size': 'b',\n",
    "        'pretrained': 'xxx/ViCLIP-B_InternVid-FLT-10M.pth',\n",
    "    },\n",
    "    'viclip-b-internvid-200m': {\n",
    "        'size': 'b',\n",
    "        'pretrained': 'xxx/ViCLIP-B_InternVid-200M.pth',\n",
    "    },\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3fb7397a-02ef-41b5-9ffe-f2363b277778",
   "metadata": {},
   "outputs": [],
   "source": [
    "text_candidates = [\"A playful dog and its owner wrestle in the snowy yard, chasing each other with joyous abandon.\",\n",
    "                   \"A man in a gray coat walks through the snowy landscape, pulling a sleigh loaded with toys.\",\n",
    "                   \"A person dressed in a blue jacket shovels the snow-covered pavement outside their house.\",\n",
    "                   \"A pet dog excitedly runs through the snowy yard, chasing a toy thrown by its owner.\",\n",
    "                   \"A person stands on the snowy floor, pushing a sled loaded with blankets, preparing for a fun-filled ride.\",\n",
    "                   \"A man in a gray hat and coat walks through the snowy yard, carefully navigating around the trees.\",\n",
    "                   \"A playful dog slides down a snowy hill, wagging its tail with delight.\",\n",
    "                   \"A person in a blue jacket walks their pet on a leash, enjoying a peaceful winter walk among the trees.\",\n",
    "                   \"A man in a gray sweater plays fetch with his dog in the snowy yard, throwing a toy and watching it run.\",\n",
    "                   \"A person bundled up in a blanket walks through the snowy landscape, enjoying the serene winter scenery.\"]\n",
    "\n",
    "cfg = model_cfgs['viclip-l-internvid-10m-flt']\n",
    "model_l = get_viclip(cfg['size'], cfg['pretrained'])\n",
    "texts, probs = retrieve_text(frames, text_candidates, models=model_l, topk=5)\n",
    "\n",
    "for t, p in zip(texts, probs):\n",
    "    print(f'text: {t} ~ prob: {p:.4f}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a2969ba6-19d0-4893-b071-b82fa046c312",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "text: A playful dog and its owner wrestle in the snowy yard, chasing each other with joyous abandon. ~ prob: 0.8192\n",
      "text: A man in a gray sweater plays fetch with his dog in the snowy yard, throwing a toy and watching it run. ~ prob: 0.1084\n",
      "text: A pet dog excitedly runs through the snowy yard, chasing a toy thrown by its owner. ~ prob: 0.0676\n",
      "text: A playful dog slides down a snowy hill, wagging its tail with delight. ~ prob: 0.0047\n",
      "text: A person dressed in a blue jacket shovels the snow-covered pavement outside their house. ~ prob: 0.0002\n"
     ]
    }
   ],
   "source": [
    "cfg = model_cfgs['viclip-b-internvid-10m-flt']\n",
    "model_b = get_viclip(cfg['size'], cfg['pretrained'])\n",
    "texts, probs = retrieve_text(frames, text_candidates, models=model_b, topk=5)\n",
    "\n",
    "for t, p in zip(texts, probs):\n",
    "    print(f'text: {t} ~ prob: {p:.4f}')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
